# analysis_SI_section_7.R
# This script replicates the analysis in Supporting Information Section 7 (referenced in Section 4.3 of the Main Text) of the paper
#   Table SI.9, Table SI.10, Table SI.11, and Table SI.12
# Author: Yimeng Li
# Dependencies:
#   Data/VR_VoteCal_LA.Rdata

# Load libraries:
library(dplyr)
library(rdrobust)
library(RItools)
library(tidyr)

# Load data:
load("./Replication Package/Data/VR_VoteCal_LA.Rdata")
load("./Replication Package/Data/ACS cleaned/ACS_educ_cat_LA_clean.RData")
load("./Replication Package/Data/ACS cleaned/ACS_hh_income_cat_LA_clean.RData")
load("./Replication Package/Data/ACS cleaned/ACS_rent_cat_LA_clean.RData")
load("./Replication Package/Data/ACS cleaned/ACS_value_cat_LA_clean.RData")

# Function to append Census socioeconomic data to VR file:
append_ACS <- function(data, ACS_data, var_names) {
  all_vars <- c("census_state", "census_county",
                "census_tract", "census_block_group",
                var_names)
  data_appended <- data %>%
    left_join(ACS_data %>% select(all_of(all_vars)),
              by = c("cxy_state_id" = "census_state",
                     "cxy_county_id" = "census_county",
                     "cxy_tract_id" = "census_tract",
                     "cxy_block_group_id" = "census_block_group"))
  return(data_appended)
}

# Prepare data for analysis
VR_VoteCal_LA <- VR_VoteCal_LA %>%
  mutate(
    Age_18_to_29 = if_else(Age >= 18 & Age <= 29, 1, 0),
    Age_30_to_44 = if_else(Age >= 30 & Age <= 44, 1, 0),
    Age_45_to_64 = if_else(Age >= 45 & Age <= 64, 1, 0),
    Age_65_or_older = if_else(Age >= 65, 1, 0),
    cxy_block_group_id = as.integer(substr(cxy_block_id, 1, 1))
  ) %>%
  append_ACS(ACS_educ_cat_LA_clean,
             c('Perc_no_high_school', 'Perc_high_school',
               'Perc_some_college', 'Perc_bachelor', 'Perc_postgraduate')) %>%
  append_ACS(ACS_hh_income_cat_LA_clean,
             c('Perc_less_than_35000', 'Perc_35000_to_74999',
               'Perc_75000_to_124999', 'Perc_125000_or_more')) %>%
  append_ACS(ACS_rent_cat_LA_clean,
             c('Perc_less_than_1000', 'Perc_1000_to_1499',
               'Perc_1500_to_1999', 'Perc_2000_or_more')) %>%
  append_ACS(ACS_value_cat_LA_clean,
             c('Perc_less_than_500k', 'Perc_500k_to_750k',
               'Perc_750k_to_1m', 'Perc_1m_or_more'))

#*******************************************************************************
# Table SI.9
#   Demographic Composition of Voters Residing within Specified Distance of
#     the Boundary of Universal and Non-Universal Vote-by-Mail Districts
#*******************************************************************************

# Function to Calculate the Demographic Composition of Voters Residing
#   within Specified Distance of the Boundary
get_age_gender_race <- function(data_all, max_dist) {
  
  data_subset <- data_all %>%
    filter(abs_dist <= max_dist)
  
  summary_age_gender_race <- data_subset %>%
    summarise(
      prop_18_to_29 = mean(Age_18_to_29, na.rm = TRUE),
      prop_30_to_44 = mean(Age_30_to_44, na.rm = TRUE),
      prop_45_to_64 = mean(Age_45_to_64, na.rm = TRUE),
      prop_65_or_older = mean(Age_65_or_older, na.rm = TRUE),
      prop_female = mean(gender_inf, na.rm = TRUE),
      prop_white = mean(pred.whi, na.rm = TRUE),
      prop_black = mean(pred.bla, na.rm = TRUE),
      prop_hispanic = mean(pred.his, na.rm = TRUE),
      prop_asian = mean(pred.asi, na.rm = TRUE),
      prop_other = mean(pred.oth, na.rm = TRUE)
    ) %>%
    pivot_longer(
      cols = everything(),
      names_to = c(".value", "variable"),
      names_pattern = "(prop)_(.*)"
    ) %>%
    transmute(
      variable,
      Perc = 100*prop,
      max_dist = max_dist
    )
  
  return(summary_age_gender_race)
}

# Table SI.9
Table_SI9 <- bind_rows(
  get_age_gender_race(VR_VoteCal_LA, Inf),
  get_age_gender_race(VR_VoteCal_LA, 10000),
  get_age_gender_race(VR_VoteCal_LA, 5000),
  get_age_gender_race(VR_VoteCal_LA, 2000),
  get_age_gender_race(VR_VoteCal_LA, 1000),
  get_age_gender_race(VR_VoteCal_LA, 500)
) %>%
  pivot_wider(
    names_from = max_dist,
    values_from = Perc,
  ) %>%
  mutate(across(-variable, ~round(.)))

# Save Table
write.csv(Table_SI9, "./Replication Package/Table SI.9.csv", row.names = FALSE, na = '')

#*******************************************************************************
# Table SI.10
#   Socioeconomic Composition of Voters Residing within Specified Distance of
#     the Boundary of Universal and Non-Universal Vote-by-Mail Districts
#*******************************************************************************

# All socioeconomic variables of interest
all_vars_socioecon <- c(
  c('Perc_no_high_school', 'Perc_high_school',
    'Perc_some_college', 'Perc_bachelor', 'Perc_postgraduate'),
  c('Perc_less_than_35000', 'Perc_35000_to_74999',
    'Perc_75000_to_124999', 'Perc_125000_or_more'),
  c('Perc_less_than_1000', 'Perc_1000_to_1499',
    'Perc_1500_to_1999', 'Perc_2000_or_more'),
  c('Perc_less_than_500k', 'Perc_500k_to_750k',
    'Perc_750k_to_1m', 'Perc_1m_or_more')
)

# Function to Calculate the Socioeconomic Composition of Voters Residing
#   within Specified Distance of the Boundary
get_socioecon <- function(data_all, max_dist) {
  
  data_subset <- data_all %>%
    filter(abs_dist <= max_dist)
  
  summary_socioecon <- data_subset %>%
    summarise(across(all_of(all_vars_socioecon), mean, na.rm = TRUE)) %>%
    pivot_longer(
      cols = everything(),
      names_to = c(".value", "variable"),
      names_pattern = "(Perc)_(.*)"
    ) %>%
    transmute(
      variable,
      Perc = 100*Perc,
      max_dist = max_dist
    )
  
  return(summary_socioecon)
}

# Table SI.10
Table_SI10 <- bind_rows(
  get_socioecon(VR_VoteCal_LA, Inf),
  get_socioecon(VR_VoteCal_LA, 10000),
  get_socioecon(VR_VoteCal_LA, 5000),
  get_socioecon(VR_VoteCal_LA, 2000),
  get_socioecon(VR_VoteCal_LA, 1000),
  get_socioecon(VR_VoteCal_LA, 500)
) %>%
  pivot_wider(
    names_from = max_dist,
    values_from = Perc,
  ) %>%
  mutate(across(-variable, ~round(.)))

# save Table
write.csv(Table_SI10, "./Replication Package/Table SI.10.csv", row.names = FALSE, na = '')

#*******************************************************************************
# Table SI.11
#   Demographic Comparison between Universal and Non-Universal VBM Districts as a Function of Distance
#*******************************************************************************

# Function to calculate the number of observations by Permanent VBM status and maximal distance from boundary
get_N_by_condition <- function(data_all, PVBM, max_dist) {
  
  data_subset <- data_all %>%
    filter(IsPermanentVBM == PVBM, abs_dist <= max_dist)
  
  N <- data_subset %>%
    group_by(UVBM) %>%
    count() %>%
    mutate(IsPermanentVBM = PVBM, max_dist = max_dist) %>%
    ungroup()
  
  return(N)
}

# Calculate the number of observations by Permanent VBM status and maximal distance from boundary
N_by_condition <- bind_rows(
  get_N_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", Inf),
  get_N_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 10000),
  get_N_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 5000),
  get_N_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 2000),
  get_N_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 1000),
  get_N_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 500),
  get_N_by_condition(VR_VoteCal_LA, "Perm. VBM", Inf),
  get_N_by_condition(VR_VoteCal_LA, "Perm. VBM", 10000),
  get_N_by_condition(VR_VoteCal_LA, "Perm. VBM", 5000),
  get_N_by_condition(VR_VoteCal_LA, "Perm. VBM", 2000),
  get_N_by_condition(VR_VoteCal_LA, "Perm. VBM", 1000),
  get_N_by_condition(VR_VoteCal_LA, "Perm. VBM", 500)
) %>%
  pivot_wider(
    names_from = max_dist,
    values_from = n,
  )

# Function to calculate balance statistic (chi-sq) by Permanent VBM status and maximal distance from boundary
get_cov_balance_by_condition <- function(data_all, PVBM, max_dist) {
  
  data_subset <- data_all %>%
    filter(IsPermanentVBM == PVBM, abs_dist <= max_dist)
  
  cov_balance <- xBalance(UVBM ~ Age + gender_inf + pred.whi + pred.bla + pred.his + pred.asi,
                          data = data_subset, report = c("chisquare.test"))
  
  cov_balance_result <- c(cov_balance$overall$chisquare, PVBM, max_dist)
  
  return(cov_balance_result)
}

# Calculate balance statistic (chi-sq) by Permanent VBM status and maximal distance from boundary
cov_balance_by_condition <- rbind(
  get_cov_balance_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", Inf),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 10000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 5000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 2000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 1000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Non-Perm. VBM", 500),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Perm. VBM", Inf),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Perm. VBM", 10000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Perm. VBM", 5000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Perm. VBM", 2000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Perm. VBM", 1000),
  get_cov_balance_by_condition(VR_VoteCal_LA, "Perm. VBM", 500)
) %>%
  as.data.frame() %>%
  pivot_wider(
    names_from = V3,
    values_from = V1,
  )

# Table SI.11
Table_SI11 <- bind_rows(
  cov_balance_by_condition %>%
    mutate(`Inf` = as.numeric(`Inf`),
           `10000` = as.numeric(`10000`),
           `5000` = as.numeric(`5000`),
           `2000` = as.numeric(`2000`),
           `1000` = as.numeric(`1000`),
           `500` = as.numeric(`500`)) %>%
    rename(IsPermanentVBM = V2) %>%
    mutate(statistic = "Chi_Squared Statistic") %>%
    relocate(statistic),
  N_by_condition %>%
    mutate(statistic = case_when(UVBM == 1 ~ "UVBM sample size",
                                 UVBM == 0 ~ "Non-UVBM sample size")) %>%
    select(-UVBM) %>%
    relocate(statistic)
) %>%
  mutate(statistic = factor(statistic, levels = c("Chi_Squared Statistic", "UVBM sample size", "Non-UVBM sample size"))) %>%
  arrange(IsPermanentVBM, statistic) %>%
  mutate(across(-c(statistic, IsPermanentVBM), ~round(.)))

# Save Table
write.csv(Table_SI11, "./Replication Package/Table SI.11.csv", row.names = FALSE, na = '')

#*******************************************************************************
# Table SI.12
#   Demographic Comparison between Universal and Non-Universal VBM Districts Using Placebo RD
#*******************************************************************************

# Prepare data for analysis
Voters_NPAV_within_10km <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Non-Perm. VBM", abs_dist <= 10000)

Voters_PAV_within_10km <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Perm. VBM", abs_dist <= 10000)

# Non-Permanent VBM Voters
result_NPAV_18_to_29 <- rdrobust(
  Voters_NPAV_within_10km$Age_18_to_29,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_30_to_44 <- rdrobust(
  Voters_NPAV_within_10km$Age_30_to_44,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_45_to_64 <- rdrobust(
  Voters_NPAV_within_10km$Age_45_to_64,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_65_or_older <- rdrobust(
  Voters_NPAV_within_10km$Age_65_or_older,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_gender <- rdrobust(
  Voters_NPAV_within_10km$gender_inf,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_whi <- rdrobust(
  Voters_NPAV_within_10km$pred.whi,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_bla <- rdrobust(
  Voters_NPAV_within_10km$pred.bla,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_his <- rdrobust(
  Voters_NPAV_within_10km$pred.his,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_asi <- rdrobust(
  Voters_NPAV_within_10km$pred.asi,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

result_NPAV_oth <- rdrobust(
  Voters_NPAV_within_10km$pred.oth,
  Voters_NPAV_within_10km$dist_from_boundary,
  cluster = Voters_NPAV_within_10km$Dist
)

# Permanent VBM Voters
result_PAV_18_to_29 <- rdrobust(
  Voters_PAV_within_10km$Age_18_to_29,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_30_to_44 <- rdrobust(
  Voters_PAV_within_10km$Age_30_to_44,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_45_to_64 <- rdrobust(
  Voters_PAV_within_10km$Age_45_to_64,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_65_or_older <- rdrobust(
  Voters_PAV_within_10km$Age_65_or_older,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_gender <- rdrobust(
  Voters_PAV_within_10km$gender_inf,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_whi <- rdrobust(
  Voters_PAV_within_10km$pred.whi,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_bla <- rdrobust(
  Voters_PAV_within_10km$pred.bla,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_his <- rdrobust(
  Voters_PAV_within_10km$pred.his,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_asi <- rdrobust(
  Voters_PAV_within_10km$pred.asi,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

result_PAV_oth <- rdrobust(
  Voters_PAV_within_10km$pred.oth,
  Voters_PAV_within_10km$dist_from_boundary,
  cluster = Voters_PAV_within_10km$Dist
)

# Table SI.12
Table_SI12 <- left_join(
  rbind(
    c("NPAV", "18 to 29", result_NPAV_18_to_29$coef[3], result_NPAV_18_to_29$se[3]),
    c("NPAV", "30 to 44", result_NPAV_30_to_44$coef[3], result_NPAV_30_to_44$se[3]),
    c("NPAV", "45 to 64", result_NPAV_45_to_64$coef[3], result_NPAV_45_to_64$se[3]),
    c("NPAV", "65 or older", result_NPAV_65_or_older$coef[3], result_NPAV_65_or_older$se[3]),
    c("NPAV", "Female", result_NPAV_gender$coef[3], result_NPAV_gender$se[3]),
    c("NPAV", "White", result_NPAV_whi$coef[3], result_NPAV_whi$se[3]),
    c("NPAV", "Black", result_NPAV_bla$coef[3], result_NPAV_bla$se[3]),
    c("NPAV", "Hispanic", result_NPAV_his$coef[3], result_NPAV_his$se[3]),
    c("NPAV", "Asian", result_NPAV_asi$coef[3], result_NPAV_asi$se[3]),
    c("NPAV", "Other", result_NPAV_oth$coef[3], result_NPAV_oth$se[3])
  ) %>% as.data.frame() %>% transmute(NPAV = V1, Variable = V2, Estimate = round(100*as.numeric(V3), 1), Clustered_SE = round(100*as.numeric(V4), 1)),
  rbind(
    c("PAV", "18 to 29", result_PAV_18_to_29$coef[3], result_PAV_18_to_29$se[3]),
    c("PAV", "30 to 44", result_PAV_30_to_44$coef[3], result_PAV_30_to_44$se[3]),
    c("PAV", "45 to 64", result_PAV_45_to_64$coef[3], result_PAV_45_to_64$se[3]),
    c("PAV", "65 or older", result_PAV_65_or_older$coef[3], result_PAV_65_or_older$se[3]),
    c("PAV", "Female", result_PAV_gender$coef[3], result_PAV_gender$se[3]),
    c("PAV", "White", result_PAV_whi$coef[3], result_PAV_whi$se[3]),
    c("PAV", "Black", result_PAV_bla$coef[3], result_PAV_bla$se[3]),
    c("PAV", "Hispanic", result_PAV_his$coef[3], result_PAV_his$se[3]),
    c("PAV", "Asian", result_PAV_asi$coef[3], result_PAV_asi$se[3]),
    c("PAV", "Other", result_PAV_oth$coef[3], result_PAV_oth$se[3])
  ) %>% as.data.frame() %>% transmute(PAV = V1, Variable = V2, Estimate = round(100*as.numeric(V3), 1), Clustered_SE = round(100*as.numeric(V4), 1)),
  by = "Variable", suffix = c(".NPAV", ".PAV"))

# Save Table
write.csv(Table_SI12, "./Replication Package/Table SI.12.csv", row.names = FALSE, na = '')
